## ---- Processing ----
dict <- c(
  "fem" = "Women",
  "age" = "Age",
  "hh_mmb_umn" = "Household members",
  "hh_prop_ecact_umn" = "Proportion econ. active household members",
  "log_hinc_eq_umn" = "Log. monthly household income (equiv.)",
  "edu5_...Lower.Secondary" = "Education: Less than upper-secondary",
  "edu5_Upper.Secondary" = "Education: Secondary",
  "edu5_Post.Secondary.Non.Tertiary" = "Education: Post-secondary non-tertiary",
  "edu5_Higher.Vocational" = "Education: Higher vocational",
  "edu5_Higher.Education" = "Education: Tertiary",
  "lm_part_Atypical_umn" = "Employment: Atypical",
  "lm_part_In_Education_umn" = "Employment: In Education",
  "lm_part_Inactive_umn" = "Employment: Inactive",
  "lm_part_Pensioners_umn" = "Employment: Retired",
  "lm_part_Unemployed_umn" = "Employment: Unemployed",
  "cmr_arm_umn" = "Local market rent (EUR/sqm)",
  "cold_rent_sqm_umn" = "Household rent (EUR/sqm)",
  "gtyp3_rural" = "Locality: Rural",
  "gtyp3_suburban" = "Locality: Suburban",
  "gtyp3_urban" = "Locality: Urban",
  "kr_foreigner_umn" = "County-level non-citizens",
  "kr_uemprate_umn" = "County-level unemployment",
  "east" = "Lives in East Germany",
  "afd" = "Supports AfD",
  "vote_afd" = "Voted for AfD",
  "cdu" = "Supports CDU/CSU",
  "fdp" = "Supports FDP",
  "gre" = "Supports Greens",
  "lef" = "Supports The Left",
  "spd" = "Supports SPD",
  "lw" = "Supports left-wing parties",
  "rw" = "Supports right-wing parties",
  "non" = "Supports no party"
)

model_vars <-
  read.csv("est/subset_characteristics.csv") %>%
  dplyr::select(-X) %>%
  dplyr::filter(is.na(`Market.Segment`)) %>%
  dplyr::filter(Outcome %in% names(dict)) %>%
  dplyr::mutate(
    Subset = dplyr::case_when(
      is.na(Subset) ~ "All",
      Subset == "ltr_since_3 == 1" & subset_ltr == 1 ~ "Since 3 years",
      Subset == "ltr_since_5 == 1" & subset_ltr == 1 ~ "Since 5 years",
      Subset == "ltr_since_3 == 1" & subset_ltr == 0 ~ "Less than 3 years",
      Subset == "ltr_since_5 == 1" & subset_ltr == 0 ~ "Less than 5 years"
    )
  ) %>%
  dplyr::mutate(
    Prediction = ifelse(
      Outcome %in% c(
        "fem",
        "east",
        "hh_prop_ecact_umn",
        "afd",
        "vote_afd",
        "cdu",
        "fdp",
        "gre",
        "lef",
        "spd",
        "lw" ,
        "rw" ,
        "non"
      ) |
        startsWith(Outcome, "gtyp3") |
        startsWith(Outcome, "edu5") |
        startsWith(Outcome, "lm_part")
      ,
      Prediction * 100,
      Prediction
    )
  ) %>%
  dplyr::mutate(lower = ifelse(
    Outcome %in% c(
      "fem",
      "east",
      "hh_prop_ecact_umn",
      "afd",
      "vote_afd",
      "cdu",
      "fdp",
      "gre",
      "lef",
      "spd",
      "lw" ,
      "rw" ,
      "non"
    ) |
      startsWith(Outcome, "gtyp3") |
      startsWith(Outcome, "edu5") |
      startsWith(Outcome, "lm_part")
    ,
    lower * 100,
    lower
  )) %>%
  dplyr::mutate(upper = ifelse(
    Outcome %in% c(
      "fem",
      "east",
      "hh_prop_ecact_umn",
      "afd",
      "vote_afd",
      "cdu",
      "fdp",
      "gre",
      "lef",
      "spd",
      "lw" ,
      "rw" ,
      "non"
    ) |
      startsWith(Outcome, "gtyp3") |
      startsWith(Outcome, "edu5") |
      startsWith(Outcome, "lm_part")
    ,
    upper * 100,
    upper
  )) %>% 
  dplyr::mutate(
    Outcome = factor(Outcome,
                     levels = names(dict),
                     labels = dict,
                     ordered = TRUE)
  ) %>%
  dplyr::arrange(owner, Subset, Outcome) %>%
  dplyr::mutate(
    Mean = paste0(
      format(round(Prediction, 1), nsmall = 1),
      " [",
      format(round(lower, 1), nsmall = 1),
      ", ",
      format(round(upper, 1), nsmall = 1),
      "]"
    )
  ) %>%
  dplyr::select(Outcome,
                Subset,
                Mean,
                owner,
                n) %>%
  distinct() %>%
  dplyr::mutate(n = as.integer(n))

n_renter <- model_vars %>%
  dplyr::filter(owner  == 0) %>%
  dplyr::select(Subset, n) %>%
  dplyr::distinct() %>%
  tidyr::pivot_longer(cols = n) %>%
  dplyr::group_by(Subset) %>%
  dplyr::mutate(name = ifelse(dplyr::row_number() == 1, "n (full)", "n (vote choice)")) %>%
  dplyr::ungroup() %>%
  tidyr::pivot_wider(
    names_from = Subset,
    values_from = value
  ) %>%
  dplyr::rename(Outcome = name) %>%
  dplyr::mutate_all(unlist) %>%
  dplyr::mutate_if(
    is.integer,
    .funs = ~format(., big.mark=",")
  )

n_owner <- model_vars %>%
  dplyr::filter(owner == 1) %>%
  dplyr::filter(Outcome != "Household rent (EUR/sqm)") %>%
  dplyr::select(Subset, n) %>%
  dplyr::distinct() %>%
  tidyr::pivot_longer(cols = n) %>%
  dplyr::group_by(Subset) %>%
  dplyr::mutate(name = ifelse(dplyr::row_number() == 1, "n (full)", "n (vote choice)")) %>%
  dplyr::ungroup() %>%
  tidyr::pivot_wider(
    names_from = Subset,
    values_from = value
  ) %>%
  dplyr::rename(Outcome = name) %>%
  dplyr::mutate_if(
    is.integer,
    .funs = ~format(., big.mark=",")
  )

owner_summary <- model_vars %>%
  dplyr::select(-n) %>%
  dplyr::distinct() %>%
  dplyr::filter(owner == 1) %>%
  dplyr::filter(Outcome != "Household rent (EUR/sqm)") %>%
  dplyr::select(-owner) %>%
  tidyr::pivot_wider(names_from = Subset,
                     values_from = Mean) %>%
  dplyr::bind_rows(n_owner)

renter_summary <- model_vars %>%
  dplyr::select(-n) %>%
  dplyr::distinct() %>%
  dplyr::filter(owner == 0) %>%
  dplyr::select(-owner) %>%
  tidyr::pivot_wider(names_from = Subset,
                     values_from = Mean) %>%
  dplyr::bind_rows(n_renter)

## ---- TeX output ----
## Renters
print(
  xtable(
    renter_summary,
    align = c("l", "l", rep("c", 5)),
    label = "tab:renter_sum",
    caption = paste0(
      "Characterization of renters (all) and renter subsets in terms of covariates: Means/proportions and 95\\% confidence intervals."
    )
  ),
  include.rownames = FALSE,
  include.colnames = TRUE,
  sanitize.text.function = identity,
  type = "latex",
  hline.after = c(-1,
                  -1,
                  0,
                  nrow(renter_summary) - 2,
                  nrow(renter_summary),
                  nrow(renter_summary)),
  size = "\\footnotesize",
  comment = TRUE,
  floating = TRUE,
  floating.environment = "sidewaystable",
  tabular.environment = "tabular",
  file = "tex/renter_sum_2024.tex"
)

## Owners
print(
  xtable(
    owner_summary,
    align = c("l", "l", rep("c", 5)),
    label = "tab:owner_sum",
    caption = paste0(
      "Characterization of owners (all) and owner subsets in terms of covariates: Means/proportions and 95\\% confidence intervals."
    )
  ),
  include.rownames = FALSE,
  include.colnames = TRUE,
  sanitize.text.function = identity,
  type = "latex",
  hline.after = c(-1,
                  -1,
                  0,
                  nrow(owner_summary) - 2,
                  nrow(owner_summary),
                  nrow(owner_summary)),
  size = "\\footnotesize",
  comment = TRUE,
  floating = TRUE,
  floating.environment = "sidewaystable",
  tabular.environment = "tabular",
  file = "tex/owner_sum_2024.tex"
)